
from constant_data import remove_after_filename, max_prompt

import sys
sys.path.append('/data/xingshiyou-slurm/codeLLM/GPT-respond')

from data_w_r import read_data, write_data_jsonl
from tqdm import tqdm

def sub_prefix(test, max_prompt):
    suffix = test['fim_suffix']
    prefix = ''.join(test['prefix'][-1 * (max_prompt - len(suffix)):].split('\n')[1:])
    return {'prefix': prefix, 
            'fim_suffix': suffix}



if __name__ == '__main__':
    test_list = read_data()

    remove_list = [sub_prefix(test, max_prompt) if len(test['prefix']) + len(test['fim_suffix']) >= max_prompt else test for test in test_list]


    write_data_jsonl(remove_list, remove_after_filename)
